from google.colab import drive
drive.mount('/content/drive')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
data = pd.read_csv('covid_19_clean_complete.csv')
data.head()
data.isnull().sum()
df = data.dropna(axis='columns')
df.head()
df.shape
df['Still Infected'] = df['Confirmed'] - df['Deaths'] - df['Recovered']
df.head()
full_latest = df[df['Date'] == max(df['Date'])].reset_index()
full_latest_grouped = full_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Still Infected'].sum().reset_index()
full_latest_grouped.head()
temp = df.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Still Infected'].max()
temp.style.background_gradient(cmap='Pastel1_r')
temp = df.groupby('Date')['Confirmed', 'Deaths', 'Recovered', 'Still Infected'].sum()
temp = temp.reset_index()
temp = temp.sort_values('Date', ascending=False)
temp.head(1).reset_index(drop=True).style.background_gradient(cmap="Pastel1")
temp = full_latest_grouped[full_latest_grouped['Recovered']==0]
temp = temp[['Country/Region', 'Confirmed', 'Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Reds')
from plotnine import *
import plotly.express as px
import folium
import seaborn as sns
c = '#393e46'
d = '#ff2e63'
r = '#30e3ca'
i = '#f8b400'
cdr = [c, d, r] #grey - red - blue
idr = [i, d, r] #yellow - red - blue
fig = px.bar(full_latest_grouped[['Country/Region', 'Confirmed']].sort_values('Confirmed', ascending=False),
y="Confirmed", x="Country/Region", color='Country/Region',
log_y=True, template='ggplot2', title='Confirmed Cases')
fig.show()
fig = px.bar(full_latest_grouped[['Country/Region', 'Deaths']].sort_values('Deaths', ascending=False),
y="Deaths", x="Country/Region", color='Country/Region',
log_y=True, template='ggplot2', title='Deaths Cases')
fig.show()
temp = df.groupby(['Country/Region', 'Date'])['Confirmed', 'Deaths', 'Recovered'].sum()
temp = temp.reset_index()
fig = px.bar(temp, x="Date", y="Confirmed", color="Country/Region", orientation='v', height=600,
title='Confirmed Cases', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
temp = df.groupby('Date').sum().reset_index()
temp.head()
temp['No. of Deaths to 100 Confirmed Cases'] = round(temp['Deaths']/
temp['Confirmed'], 3)*100
temp['No. of Recovered to 100 Confirmed Cases'] = round(temp['Recovered']/
temp['Confirmed'], 3)*100
temp['No. of Recovered to 1 Death Cases'] = round(temp['Recovered']/
temp['Deaths'], 3)*100
temp = temp.melt(id_vars='Date',
value_vars=['No. of Deaths to 100 Confirmed Cases',
'No. of Recovered to 100 Confirmed Cases',
'No. of Recovered to 1 Death Cases' ],
var_name = 'Ratio',
value_name='Value')
fig = px.line(temp, x="Date", y="Value", color='Ratio',
title='Recovery and Mortality Rate Over The Time', color_discrete_sequence=cdr)
fig.show()
m = folium.Map(location=[0, 0], tiles='cartodbpositron',
min_zoom=1, max_zoom=4, zoom_start=1)
for i in range(0, len(full_latest)):
folium.Circle(
location=[full_latest.iloc[i]['Lat'], full_latest.iloc[i]['Long']],
color='crimson',
tooltip = '<li><bold>Country : '+str(full_latest.iloc[i]['Country/Region'])+
'<li><bold>Confirmed : '+str(full_latest.iloc[i]['Confirmed'])+
'<li><bold>Deaths : '+str(full_latest.iloc[i]['Deaths'])+
'<li><bold>Recovered : '+str(full_latest.iloc[i]['Recovered']),
radius=int(full_latest.iloc[i]['Confirmed'])).add_to(m)
m
fig = px.choropleth(full_latest_grouped[full_latest_grouped['Deaths']>0],
locations="Country/Region", locationmode='country names',
color="Deaths", hover_name="Country/Region",
range_color=[1,50], color_continuous_scale="agsunset",
title='Countries With Deaths Reported')
fig.update(layout_coloraxis_showscale=False)
fig.show()
fig = px.choropleth(full_latest_grouped, locations="Country/Region",
locationmode='country names', color="Confirmed",
hover_name="Country/Region", range_color=[1,2000],
color_continuous_scale="aggrnyl",
title='Countries with Confirmed Cases')
fig.update(layout_coloraxis_showscale=False)
fig.show()
formated_gdf = df.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf = formated_gdf[formated_gdf['Country/Region'] != 'China']
formated_gdf ['Date'] = pd.to_datetime(formated_gdf['Date'])
formated_gdf['Date'] = formated_gdf['Date'].dt.strftime('%m%d%y')
formated_gdf['size'] = formated_gdf['Confirmed'].pow(0.5)
fig = px.scatter_geo(formated_gdf[formated_gdf['Country/Region'] != 'China'],
locations='Country/Region', locationmode='country names')